package org.indexador.util;

import java.io.*;

import javax.xml.parsers.*;

import org.w3c.dom.*;

public class LeitorArquivosDOM {
  public static void main(String[] args) {
    try {
      DocumentBuilderFactory fabrica = DocumentBuilderFactory.newInstance();
      DocumentBuilder builder = fabrica.newDocumentBuilder();
      InputStream is = new FileInputStream(Constantes.ARQUIVO_PAGINAS_WIKIPEDIA);
      Document documento = builder.parse(is);
      NamedNodeMap atributos = documento.getAttributes();
      int tamanho = atributos.getLength();
      System.out.println(tamanho);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}
